miχpods: MOM6#

  • test out hvplot

  • build LES catalog

  • bootstrap error on mean, median?

  • Try daily vs hourly

  • add TAO χpods

  • fix EUC max at 110

Questions:

  • N2 vs N2T

  • match time intervals

  • match vertical resolution and extent

  • restrict TAO S2 to ADCP depth range

  • restrict to top 200m.

References#

Warner & Moum (2019)

Setup#

%load_ext watermark

import datetime
import glob
import os

import cf_xarray
import dask
import dcpy
import distributed
import flox.xarray
import holoviews as hv
import hvplot.xarray
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import xgcm

import pump
from pump import mixpods

hv.notebook_extension('bokeh')

dask.config.set({"array.slicing.split_large_chunks": False})
plt.rcParams["figure.dpi"] = 140 
plt.style.use("bmh")
xr.set_options(keep_attrs=True, display_expand_data=False)

gcmdir = "/glade/campaign/cgd/oce/people/bachman/TPOS_1_20_20_year/OUTPUT/"  # MITgcm output directory
stationdirname = gcmdir

%watermark -iv
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/dask_jobqueue/core.py:20: FutureWarning: tmpfile is deprecated and will be removed in a future release. Please use dask.utils.tmpfile instead.
  from distributed.utils import tmpfile
distributed: 2022.7.0
numpy      : 1.22.4
xarray     : 2022.6.0rc0
xgcm       : 0.6.0
matplotlib : 3.5.2
holoviews  : 1.14.9
sys        : 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:04:59) [GCC 10.3.0]
dask       : 2022.7.0
json       : 2.0.9
flox       : 0.5.9
hvplot     : 0.8.0
pump       : 0.1
cf_xarray  : 0.7.4
dcpy       : 0.1.dev385+g121534c
import ncar_jobqueue

if "client" in locals():
    client.close()
    del client
if "cluster" in locals():
    cluster.close()

#env = {"OMP_NUM_THREADS": "3", "NUMBA_NUM_THREADS": "3"}

# cluster = distributed.LocalCluster(
#    n_workers=8,
#    threads_per_worker=1,
#    env=env
# )

if "cluster" in locals():
    del cluster

#cluster = ncar_jobqueue.NCARCluster(
#    project="NCGD0011",
#    scheduler_options=dict(dashboard_address=":9797"),
#)
# cluster = dask_jobqueue.PBSCluster(
#    cores=9, processes=9, memory="108GB", walltime="02:00:00", project="NCGD0043",
#    env_extra=env,
# )

import dask_jobqueue

cluster = dask_jobqueue.PBSCluster(
    cores=1, # The number of cores you want
    memory='12GB', # Amount of memory
    processes=1, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='$TMPDIR', # Use your local directory
    resource_spec='select=1:ncpus=1:mem=12GB', # Specify resources
    project='ncgd0011', # Input your project ID here
    walltime='02:00:00', # Amount of wall time
    interface='ib0', # Interface to use
)

cluster.adapt(minimum_jobs=2, maximum_jobs=12)

client = distributed.Client(cluster)
dask.config.set(scheduler=client)
client

Client

Client-cea847b1-12a4-11ed-b1af-3cecef1b12d4

Connection method: Cluster object Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status

Cluster Info

Read data#

TAO#

tao_gridded = (
    xr.open_dataset(
        os.path.expanduser("~/work/pump/zarrs/tao-gridded-ancillary.zarr"), chunks="auto", engine="zarr"
    )
    .sel(longitude=-140, time=slice("1996", None))
)
tao_gridded["depth"].attrs["axis"] = "Z"
tao_gridded = mixpods.prepare(tao_gridded, oni=pump.obs.process_oni())

tao_gridded.u.cf.plot()
tao_gridded.eucmax.plot()

tao_gridded = (
    tao_gridded.update({
        "n2s2pdf": mixpods.pdf_N2S2(
            tao_gridded[["S2", "N2T"]].drop_vars(["shallowest", "zeuc"])
        ).persist()
    }
    )
)
tao_gridded
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "depth" starting at index 58. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "time" starting at index 139586. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "longitude" starting at index 2. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(

KeyboardInterrupt
tao_Ri = xr.load_dataarray("tao-hourly-Ri-seasonal-percentiles.nc").cf.guess_coord_axis()

MITgcm stations#

stations = pump.model.read_stations_20(stationdirname)

gcmeq = stations.sel(longitude=[-155.025, -140.025, -125.025, -110.025], method="nearest")
#enso = pump.obs.make_enso_mask()
#mitgcm["enso"] = enso.reindex(time=mitgcm.time.data, method="nearest")

#gcmeq["eucmax"] = pump.calc.get_euc_max(gcmeq.u)
# pump.calc.calc_reduced_shear(gcmeq)

#oni = pump.obs.process_oni()
#gcmeq["enso_transition"] = mixpods.make_enso_transition_mask(oni).reindex(time=gcmeq.time.data, method="nearest")


mitgcm = gcmeq.sel(longitude=-140.025, method="nearest")
metrics = pump.model.read_metrics(stationdirname)
mitgcm_grid = xgcm.Grid(
    metrics.sel(latitude=mitgcm.latitude, longitude=mitgcm.longitude, method="nearest"),
    coords=({"Z": {"center": "depth", "outer": "RF"}, "Y": {"center": "latitude"}}),
    metrics={"Z": ("drF", "drC")},
    periodic=False,
    boundary="fill",
    fill_value=np.nan,
)

mitgcm = mixpods.prepare(mitgcm, grid=mitgcm_grid, oni=pump.obs.process_oni()).sel(latitude=0, method="nearest").cf.sel(Z=slice(-250))
mitgcm = mitgcm.update({"n2s2pdf": mixpods.pdf_N2S2(mitgcm)}).persist(scheduler=client)
mitgcm
mitgcm.u.cf.plot()
mitgcm.eucmax.cf.plot()
[<matplotlib.lines.Line2D at 0x2b6b605e48b0>]
_images/mixpods-mom6_10_1.png
mixpods.plot_n2s2pdf(mitgcm.n2s2pdf.sel(enso_transition_phase="none"))
<matplotlib.contour.QuadContourSet at 0x2b6b121fe9e0>
_images/mixpods-mom6_11_1.png

MOM6 run : calculate ONI#

dirname = "/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/hist"
static = xr.open_dataset(*glob.glob(f"{dirname}/*static*.nc"))
sfc = xr.open_mfdataset(
    sorted(glob.glob(f"{dirname}/*sfc*")),
    coords="minimal",
    data_vars="minimal",
    compat="override",
    use_cftime=True,
    parallel=True,
)
sfc["time"] = sfc.time + datetime.timedelta(days=365*1957)
#sfc["time"] = sfc.time + xr.coding.cftime_offsets.YearBegin(1957)
sfc.coords.update(static.drop("time"))
#sfc["tos"].attrs["coordinates"] = "geolon geolat"
sst = sfc.cf["sea_surface_temperature"]
sst.cf
Coordinates:
- CF Axes: * X: ['xh']
           * Y: ['yh']
           * T: ['time']
             Z: n/a

- CF Coordinates: * longitude: ['xh']
                  * latitude: ['yh']
                  * time: ['time']
                    vertical: n/a

- Cell Measures:   area: ['areacello']
                   volume: n/a

- Standard Names:   cell_area: ['areacello']

- Bounds:   n/a
# Calculate a monthly average sea surface temperature in the Nino 3.4 region (5°S-5°N, 170°W-120°W).
monthly_ = (
    sst.cf.sel(latitude=slice(-5, 5), longitude=slice(-170, -120))
    .cf.mean(["X", "Y"])
    .resample(time="M")
    .mean()
    #.sel(time=slice("1960", "1999-12-31"))
    #.reindex(
    #    time=xr.cftime_range("1955-01-01", monthly.time[-1].dt.strftime("%Y-%m-%d").data.item(), freq="MS")
    #)
    .load()
)

monthly = (
    monthly_
    .reindex(
        time=xr.cftime_range(
            "1956-01-01",
            monthly_.time[-1].dt.strftime("%Y-%m-%d").data.item(), 
            freq="M", 
            calendar=monthly_.time.dt.calendar,
        )
    )
    .convert_calendar("gregorian", use_cftime=False)
)
#monthly = monthly_
monthly.plot()
[<matplotlib.lines.Line2D at 0x2b6b0ffe3880>]
_images/mixpods-mom6_15_1.png
monthly.coords["oni"] = mixpods.calc_oni(monthly)
monthly.coords["enso_transition_mask"] = mixpods.make_enso_transition_mask(monthly.oni)
(
    oniobs.hvplot.line(x="time", label="obs")
    * onimom6.hvplot.line(x="time", label="MOM6")
).opts(ylabel="ONI [°C]")
oniobs.enso_transition_mask.plot()
onimom6.enso_transition_mask.plot(color='r')
[<matplotlib.lines.Line2D at 0x2b6d7fd20eb0>]
_images/mixpods-mom6_18_1.png

MOM6 sections#

import mom6_tools
import xgcm
from mom6_tools.sections import combine_nested, read_raw_files
from mom6_tools.wright_eos import wright_eos


def combine_variables_by_coords(dsets):
    import itertools

    import tqdm

    all_vars = set(itertools.chain(*[ds.data_vars for ds in dsets]))

    merged = xr.merge(
        xr.combine_by_coords([ds[var] for ds in dsets if ds.sizes["time"] > 0], combine_attrs="override")
        for var in tqdm.tqdm(all_vars)
    )
    
    return merged
import glob

# dirname = "/glade/scratch/gmarques/gmom.e23.GJRAv3.TL319_t061_zstar_N65.mixpods.001/run"

dirname = "/glade/scratch/dcherian/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/run/"
files = sorted(glob.glob(f"{dirname}/*TAO*140W*.nc.*"))

lons = [140]  # [90, 110, 140, 155, 170]  # TODO: Add 125
dsets = read_raw_files(files, parallel=True)
combined = combine_variables_by_coords(dsets)

#mom6tao = read_tao_sections(dirname)

def interp_to_center(ds):
    import toolz as tlz

    ix0 = np.arange(1, ds.sizes["xq"], 4)
    ix1 = ix0 + 1
    indices = list(tlz.interleave([ix0, ix1]))

    # print(ds.xq.data[indices])
    out = (
        ds
        #.isel(xq=[1, 2, 5, 6, 8, 9, 12, 13, 16, 17])
        .isel(xq=[1, 2])
        .coarsen(xq=2).mean()
    )
    out = out.sel(xh = out.xq.data, method="nearest", tolerance=0.05)
    
    for var in out:
        if "xq" in out[var].dims:
            out[var] = out[var].rename({"xq": "xh"}).drop("xh")
    return out.drop_vars("xq")


mom6tao = interp_to_center(combined).cf.chunk({"Y": -1})
mom6tao["dens"] = wright_eos(mom6tao.thetao, mom6tao.so, 0)
mom6tao["dens"].attrs.update(
    {"units": "kg/m^3", "standard_name": "sea_water_potential_density"}
)
mom6tao
100%|██████████| 21/21 [02:21<00:00,  6.76s/it]
<xarray.Dataset>
Dimensions:           (time: 480600, yh: 49, xh: 1, zi: 66, zl: 65, nv: 2,
                       yq: 49)
Coordinates:
  * time              (time) object 0001-01-06 00:30:00 ... 0056-01-05 23:30:00
  * yh                (yh) float64 -5.911 -5.664 -5.417 ... 5.541 5.788 6.035
  * xh                (xh) float64 -140.0
  * zi                (zi) float64 0.0 2.5 5.0 7.5 ... 5.503e+03 5.751e+03 6e+03
  * zl                (zl) float64 1.25 3.75 6.25 ... 5.627e+03 5.876e+03
  * nv                (nv) float64 1.0 2.0
  * yq                (yq) float64 -6.035 -5.788 -5.541 ... 5.417 5.664 5.911
Data variables: (12/22)
    average_T2        (time) object dask.array<chunksize=(8640,), meta=np.ndarray>
    KPP_kheat         (time, zi, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
    volcello          (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
    net_heat_surface  (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
    Kv_u              (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
    time_bnds         (time, nv) timedelta64[ns] dask.array<chunksize=(8640, 2), meta=np.ndarray>
    ...                ...
    zos               (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
    KPP_OBLdepth      (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
    vo                (time, zl, yq, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
    average_DT        (time) timedelta64[ns] dask.array<chunksize=(8640,), meta=np.ndarray>
    tauy              (time, yq, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
    dens              (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>

Save#

mom6tao.chunk({"time": 24*365}).to_zarr(
    f"/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/moorings/tao.zarr",
    mode="w",
    consolidated=True,
)
<xarray.backends.zarr.ZarrStore at 0x2b6dbf987d80>

Read sections#

mom6tao = (
    xr.open_dataset(
        "/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/moorings/tao.zarr",
        engine="zarr"
    )
    .drop_vars(["average_T1", "average_T2"])
    .chunk("auto")
)
mom6tao["time"] = mom6tao.indexes["time"].to_datetimeindex()
from mom6_tools.wright_eos import wright_eos

mom6tao["densT"] = wright_eos(mom6tao.thetao, 35, 0)
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/coding/times.py:682: SerializationWarning: Unable to decode time axis into full numpy.datetime64 objects, continuing using cftime.datetime objects instead, reason: dates out of range
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/coding/times.py:360: FutureWarning: Index.ravel returning ndarray is deprecated; in a future version this will return a view on self.
  sample = dates.ravel()[0]
/glade/scratch/dcherian/tmp/ipykernel_110208/2270305083.py:9: RuntimeWarning: Converting a CFTimeIndex with dates from a non-standard calendar, 'noleap', to a pandas.DatetimeIndex, which uses dates from the standard calendar.  This may lead to subtle errors in operations that depend on the length of time between dates.
  mom6tao["time"] = mom6tao.indexes["time"].to_datetimeindex()
grid = xgcm.Grid(
    mom6tao,
    coords={"Z": {"outer": "zi", "center": "zl"}, 
            "X": {"center": "xh"},
            "Y": {"center": "yh", "left": "yq"},
           },
    periodic=False,
    boundary="fill",
    fill_value=np.nan,
    metrics = {("Z",): "h"}, 
)
grid
<xgcm.Grid>
Z Axis (not periodic, boundary='fill'):
  * outer    zi --> center
  * center   zl --> outer
X Axis (not periodic, boundary='fill'):
  * center   xh
Y Axis (not periodic, boundary='fill'):
  * center   yh --> left
  * left     yq --> center
mom6tao = mixpods.prepare(mom6tao, grid, monthly)
mom6tao
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xgcm/grid.py:1471: UserWarning: Metric at ('time', 'zi', 'yh', 'xh') being interpolated from metrics at dimensions ('time', 'zl', 'yh', 'xh'). Boundary value set to 'extend'.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xgcm/grid.py:1471: UserWarning: Metric at ('time', 'zi', 'yh', 'xh') being interpolated from metrics at dimensions ('time', 'zl', 'yh', 'xh'). Boundary value set to 'extend'.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xgcm/grid.py:1471: UserWarning: Metric at ('time', 'zi', 'yq', 'xh') being interpolated from metrics at dimensions ('time', 'zl', 'yh', 'xh'). Boundary value set to 'extend'.
  warnings.warn(
<xarray.Dataset>
Dimensions:           (time: 480600, yh: 49, xh: 1, zi: 66, zl: 65, nv: 2,
                       yq: 49)
Coordinates:
  * nv                (nv) float64 1.0 2.0
  * time              (time) datetime64[ns] 1958-01-06T00:30:00 ... 2013-01-0...
  * xh                (xh) float64 -140.0
  * yh                (yh) float64 -5.911 -5.664 -5.417 ... 5.541 5.788 6.035
  * yq                (yq) float64 -6.035 -5.788 -5.541 ... 5.417 5.664 5.911
  * zi                (zi) float64 0.0 2.5 5.0 7.5 ... 5.503e+03 5.751e+03 6e+03
  * zl                (zl) float64 1.25 3.75 6.25 ... 5.627e+03 5.876e+03
    oni               (time) float32 nan nan nan nan nan ... nan nan nan nan nan
    warm_mask         (time) bool True True True True ... True True True True
    cool_mask         (time) bool False False False False ... False False False
    enso_transition   (time) <U12 '____________' ... '____________'
Data variables: (12/26)
    KPP_OBLdepth      (time, yh, xh) float32 dask.array<chunksize=(480600, 49, 1), meta=np.ndarray>
    KPP_buoyFlux      (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
    KPP_kheat         (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
    Kd_heat           (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
    Kv_u              (time, zl, yh, xh) float32 dask.array<chunksize=(9612, 65, 49, 1), meta=np.ndarray>
    SW                (time, yh, xh) float32 dask.array<chunksize=(480600, 49, 1), meta=np.ndarray>
    ...                ...
    densT             (time, zl, yh, xh) float32 dask.array<chunksize=(9612, 65, 49, 1), meta=np.ndarray>
    N2T               (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 49, 1), meta=np.ndarray>
    S2                (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
    shred2            (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
    Rig_T             (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
    eucmax            (time, xh) float64 dask.array<chunksize=(9612, 1), meta=np.ndarray>
mom6140 = mom6tao.cf.sel(longitude=-140, latitude=0, method="nearest").cf.sel(Z=slice(150))
mom6140 = mom6140.update(
    {"n2s2pdf": mixpods.pdf_N2S2(mom6140[["S2", "N2T", "eucmax"]])}
)
mom6140[["uo", "eucmax", "n2s2pdf", "S2", "N2T"]].load()
<xarray.Dataset>
Dimensions:                (time: 480600, zl: 21, N2T_bins: 29, S2_bins: 29,
                            enso_transition_phase: 7, zi: 22)
Coordinates: (12/14)
  * time                   (time) datetime64[ns] 1958-01-06T00:30:00 ... 2013...
    xh                     float64 -140.0
    yh                     float64 0.0625
    yq                     float64 -0.0625
  * zi                     (zi) float64 0.0 2.5 5.0 7.5 ... 120.7 133.7 147.6
  * zl                     (zl) float64 1.25 3.75 6.25 ... 114.5 127.2 140.6
    ...                     ...
    cool_mask              (time) bool False False False ... False False False
    enso_transition        (time) <U12 '____________' ... '____________'
  * N2T_bins               (N2T_bins) object (-5.0, -4.896551724137931] ... (...
  * S2_bins                (S2_bins) object (-5.0, -4.896551724137931] ... (-...
  * enso_transition_phase  (enso_transition_phase) object 'none' ... 'all'
    bin_areas              (N2T_bins, S2_bins) float64 0.0107 0.0107 ... 0.0107
Data variables:
    uo                     (time, zl) float32 -0.4169 -0.3548 ... 0.9186 0.8345
    eucmax                 (time) float64 114.5 114.5 114.5 ... 127.2 127.2
    n2s2pdf                (N2T_bins, S2_bins, enso_transition_phase) float64 ...
    S2                     (time, zi) float32 nan 0.000642 ... 6.569e-05
    N2T                    (time, zi) float32 nan 0.0001751 ... 0.0001191
mom6140.uo.cf.plot(robust=True)
mom6140.eucmax.cf.plot()
[<matplotlib.lines.Line2D at 0x2b6ae1ca6fb0>]
_images/mixpods-mom6_33_1.png
mixpods.plot_n2s2pdf(mom6140.n2s2pdf.sel(enso_transition_phase="none"))
<matplotlib.contour.QuadContourSet at 0x2b6b61cf1c00>
_images/mixpods-mom6_34_1.png

Pick simulations#

datasets = {"TAO": tao_gridded, "MITgcm": mitgcm, "MOM6": mom6140}

Compare EUC maximum#

mixpods.plot_eucmax_timeseries(datasets, obs="TAO")

Mean profiles#

kwargs = dict(show_grid=True, invert_axes=True, legend_position="right", frame_width=300, frame_height=500, 
              ylim=(-1e-3, 1.9e-3), xlim=(-250, 0))

S2 = mixpods.map_hvplot(lambda ds, name: mixpods.plot_profile_fill(ds.S2.load(), name), datasets).opts(**kwargs, ylabel="S^2")
N2 = mixpods.map_hvplot(lambda ds, name: mixpods.plot_profile_fill(4 * ds.N2T.load(), name), datasets).opts(**kwargs, ylabel="4N^2")
Ri = (
    mixpods.map_hvplot(lambda ds, name: mixpods.cfplot(ds.Rig_T.median("time"), name), datasets)
    * hv.HLine(0.25).opts(color='k', line_width=0.5)
).opts(**kwargs, ylabel="median Ri_g^T").opts(ylim=(0,1))
(S2 + N2 + Ri)

Remap to EUC coordinate#

gcmeq.coords["zeuc"] = gcmeq.depth - gcmeq.eucmax
remapped = flox.xarray.xarray_reduce(
    gcmeq.drop_vars(["SSH", "KPPhbl", "mld", "eucmax"], errors="ignore"),
    "zeuc",
    dim="depth",
    func="mean",
    expected_groups=(np.arange(-250, 250.1, 5),),
    isbin=(True,),
)
remapped["zeuc_bins"].attrs["units"] = "m"
remapped
<xarray.Dataset>
Dimensions:          (time: 174000, longitude: 4, zeuc_bins: 100)
Coordinates:
    latitude         float64 0.025
  * longitude        (longitude) float64 -155.0 -140.0 -125.0 -110.0
  * time             (time) datetime64[ns] 1998-12-31T18:00:00 ... 2018-11-06...
    cool_mask        (time) bool True True True True ... False False False False
    warm_mask        (time) bool False False False False ... True True True True
  * zeuc_bins        (zeuc_bins) object (-250.0, -245.0] ... (245.0, 250.0]
Data variables: (12/25)
    DFrI_TH          (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPdiffKzT       (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPg_TH          (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPviscAz        (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    VISrI_Um         (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    VISrI_Vm         (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    ...               ...
    S2               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    shear            (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    N2               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    shred2           (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    Ri               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    enso_transition  (zeuc_bins, time) <U12 'La-Nina cool' ... 'El-Nino warm'
Attributes:
    easting:   longitude
    northing:  latitude
    title:     Station profile, index (i,j)=(1201,240)
remapped = remapped.persist()
cluster.scale(6)

Seasonal median Ri: (0, 140)#

remapped["longitude"] = [-155.0, -140, -125, -110]
fg = (
    remapped.Ri.groupby("time.season").median()
    .sel(season=["DJF", "MAM", "JJA", "SON"], longitude=[-140, -110])
    .cf.plot(col="season", row="longitude", xlim=(0,1.5), ylim=(-20, None), label="MITgcm")
)
fg.data = tao_Ri.cf.sel(quantile=0.5, longitude=[-140, -110])
fg.map_dataarray_line(xr.plot.line, x=None, y="zeuc", hue="season", color='k', label="TAO")
fg.map(lambda: plt.axvline(0.25))
fg.axes[-1, -1].legend(bbox_to_anchor=(1.5, 1))
<matplotlib.legend.Legend at 0x2add93b58220>
_images/mixpods-mom6_49_1.png

Stability diagram: 4N2-S2 plane#

Warner & Moum (2019):

Both velocity and temperature are hourly averaged before interpolation to 5-m vertical bins

Contours enclose 50% of data

mixpods.plot_stability_diagram_by_dataset(datasets)
_images/mixpods-mom6_51_0.png

Questions:

  1. internal wave spectrum?

  2. minor axis is smaller for model, particularly MITgcm-> too diffusive?

  3. Can we think of this as the model relaxing to critical Ri too quickly

mixpods.plot_stability_diagram_by_phase(datasets, obs="TAO", fig=None)
_images/mixpods-mom6_53_0.png

Composed#

fig = plt.figure(constrained_layout=True, figsize=(12, 6))
subfigs = fig.subfigures(2, 1, height_ratios=[1.2, 1])
top = subfigs[0].subfigures(1, 3, width_ratios=[1, 5, 1])

mixpods.plot_stability_diagram_by_dataset(datasets, fig=top[1])
mixpods.plot_stability_diagram_by_phase(datasets, fig=subfigs[1])
_images/mixpods-mom6_55_0.png
(   
    tao_gridded.n2s2pdf
    .sel(enso_transition_phase=["La-Nina cool", "La-Nina warm", "El-Nino warm", "El-Nino cool"])
    .sum("N2_bins")
    .plot(hue="enso_transition_phase")
)
plt.figure()
(
    tao_gridded.n2s2pdf
    .sel(enso_transition_phase=["La-Nina cool", "La-Nina warm", "El-Nino warm", "El-Nino cool"])
    .sum("S2_bins")
    .plot(hue="enso_transition_phase")
)
[<matplotlib.lines.Line2D at 0x2b36e2bc2ad0>,
 <matplotlib.lines.Line2D at 0x2b36e2bc32e0>,
 <matplotlib.lines.Line2D at 0x2b36e2bc3310>,
 <matplotlib.lines.Line2D at 0x2b36e0ca52d0>]
_images/mixpods-mom6_56_1.png _images/mixpods-mom6_56_2.png
oni = pump.obs.process_oni().sel(time=slice("2005-Oct", "2015"))
(
    oni.hvplot.step(color='k')
+ pump.obs.make_enso_transition_mask().sel(time=slice("2005-Jun", "2015")).reset_coords(drop=True).hvplot.step()
).cols(1)

Seasonal mean heat flux#

remapped.Jq.load()
<xarray.DataArray 'Jq' (time: 174000, zeuc: 100)>
array([[        nan,         nan, -0.07841657, ...,         nan,
                nan,         nan],
       [        nan,         nan, -0.07973828, ...,         nan,
                nan,         nan],
       [        nan,         nan, -0.08094554, ...,         nan,
                nan,         nan],
       ...,
       [        nan, -0.07447129,         nan, ...,         nan,
                nan,         nan],
       [        nan, -0.07471326,         nan, ...,         nan,
                nan,         nan],
       [        nan, -0.07509062,         nan, ...,         nan,
                nan,         nan]])
Coordinates:
    latitude   float64 0.025
    longitude  float64 -140.0
  * time       (time) datetime64[ns] 1998-12-31T18:00:00 ... 2018-11-06T17:00:00
  * zeuc       (zeuc) float64 -247.5 -242.5 -237.5 -232.5 ... 237.5 242.5 247.5
import hvplot.xarray
(
    remapped.Jq.groupby("time.season").mean()
    .reindex(season=["DJF", "MAM", "JJA", "SON"])
    .cf.plot.line(col="season")
)
<xarray.plot.facetgrid.FacetGrid at 0x2affc38316c0>
_images/mixpods-mom6_61_1.png

Test out available variables#

  1. KPP_Kheat is non-zero only in KPP_OBL

(mom6140.Tflx_dia_diff * 1025 * 4000).cf.plot(robust=True)
<matplotlib.collections.QuadMesh at 0x2ba58a988fa0>
_images/mixpods-mom6_63_1.png
mom6140.Kv_u.cf.plot(norm=mpl.colors.LogNorm(1e-6, 1e-2))
<matplotlib.collections.QuadMesh at 0x2ba58a007cd0>
_images/mixpods-mom6_64_1.png
mom6140.h.median("time").plot()
mom6140.h.mean("time").plot()
[<matplotlib.lines.Line2D at 0x2ba58a547c10>]
_images/mixpods-mom6_65_1.png
mom6140.Kd_heat.cf.plot(norm=mpl.colors.LogNorm(1e-6, 1e-2))
mom6140.KPP_OBLdepth.plot(color='r')

plt.figure()
mom6140.KPP_kheat.cf.plot(norm=mpl.colors.LogNorm(1e-6, 1e-2))
mom6140.KPP_OBLdepth.plot(color='r')

plt.figure()
(mom6140.KPP_kheat - mom6140.Kd_heat).cf.plot(robust=True, cmap=mpl.cm.mpl.cm.Reds_r, vmax=0)
<matplotlib.collections.QuadMesh at 0x2ba58a8c38b0>
_images/mixpods-mom6_66_1.png _images/mixpods-mom6_66_2.png _images/mixpods-mom6_66_3.png

Experiment with manual combining#

import intake
from intake.source.utils import reverse_format

years = []
tiles = []
for file in files[:10]:
    p = pathlib.Path(file)
    params = reverse_format("__{year}.nc.{tile}", p.stem + p.suffix)
    years.append(params["year"])
    tiles.append(params["tile"])
years, tiles

import toolz as tlz

bytile = {}
for tile, v in tlz.groupby(tileid, files).items():
    bytile[tile] = xr.concat(read_raw_files(v, parallel=True), dim="time")



print("\n".join([hash(ds.yh.data.tolist()) for ds in list(bytile.values())]))

from functools import partial


def hash_coords(ds, axis):
    dims = ds.cf.axes[axis]
    data = np.concatenate(
        [ds[dim].data
        for dim in dims]
    )
    return hash(tuple(data))


grouped = bytile

for axis, concat_axis in [("X", "Y"), ("Y", "X")]:
    grouped = tlz.groupby(partial(hash_coords, axis=axis), grouped.values())
    grouped = {
        k: cfconcat(v, axis=concat_axis, join='exact') for k, v in grouped.items()
    }
    break 
grouped

cfconcat(list(grouped.values()), "X")

combined = xr.combine_by_coords(list(bytile.values()), combine_attrs="override")

def raise_if_bad_index(combined):
    bad = []
    for idx in combined.indexes:
        index = combined.indexes[idx]
        if not index.is_monotonic or index.has_duplicates:
            bad.append(idx)
    if bad:
        raise ValueError(f"Indexes {idx} are either not monotonic or have duplicates")

def tileid(path):
    p = pathlib.Path(path)
    #print(p.suffix)
    #params = reverse_format("__{year}.nc.{tile}", p.stem + p.suffix)
    return int(p.suffix[1:]) #params["tile"]
    #years.append(params["year"])
    #tiles.append(params["tile"])
    
sorted_files = sorted(files, key=tileid)
for tile, files in groupby(sorted_files, tileid):
    print(tile, len(list(files)))

Test out ONI calculation: close enough!#

ersst = xr.tutorial.open_dataset("ersstv5")

monthlyersst = (
    ersst.sst.cf.sortby("Y")
    .cf.sel(latitude=slice(-5, 5), longitude=slice(360-170, 360-120))
    .cf.mean(["X", "Y"])
    .resample(time="M").mean()
    .load()
)

expected = mixpods.calc_oni(monthlyersst)

actual = pump.obs.process_oni()
actual.plot()
expected.plot()

(actual-expected).plot()
[<matplotlib.lines.Line2D at 0x2b6d2ec04580>]
_images/mixpods-mom6_70_1.png